Gibbs Sampling of bivariate normal

rho = 0.8
y = c(0, 0)
Niter = 1000
theta = array(0, c(2, Niter))
theta[, 1] = c(5, 8) # starting point, change it to different values
for(k in 2:Niter){
  theta[1, k] = y[1] + rho * (theta[2, k-1] - y[2]) + sqrt(1 - rho^2) * rnorm(1)
  theta[2, k] = y[2] + rho * (theta[1, k] - y[1]) + sqrt(1 - rho^2) * rnorm(1)
}
# visualize trace (2dim)
plot(t(theta), type = 'l', xlab = expression(theta[1]), ylab = expression(theta[2]))
points(theta[1, 1], theta[2, 1], col = "red", cex = 3)

library(coda)
# visualize each dim
par(mfrow = c(2, 2))

for(k in 1:2){
  plot(theta[k, ], type = 'l', xlab = "iteration", ylab = expression(theta), main = paste("gibbs samples for theta", k))
}

for(k in 1:2){
  plot(theta[k, 1:30], type = 'l', xlab = "iteration", ylab = expression(theta), main = paste("first 30 iterations, theta", k))
}

# choose burnin
par(mfrow = c(1, 3))
burnin <- 30
theta <- theta[, burnin:dim(theta)[2]]
for(k in 1:2){
  plot(theta[k, ], type = 'l', xlab = "iteration", ylab = expression(theta), main = paste("gibbs samples for theta", k, "(after burnin)"))
  hist(theta[k, ], xlab = expression(theta), freq = FALSE, main = paste("theta", k))
  lines(seq(-3, 3, length.out = 100), dnorm(seq(-3, 3, length.out = 100)), col = "red")
  acf(theta[k, ])
}

for(k in 1:2){
  print(paste("Effective sample size for theta", k, "is", effectiveSize(theta[k, ])))
}
## [1] "Effective sample size for theta 1 is 209.924170799959"
## [1] "Effective sample size for theta 2 is 220.527568827683"

Gibbs Sampling of Beta-Binomial

n = 20
alpha = 0.5
beta = 0.5
Niter = 5000
x = rep(1, Niter)
y = rep(0.5, Niter)
for(k in 2:Niter){
  x[k] = rbinom(1, n, y[k-1])
  y[k] = rbeta(1, alpha + x[k], n - x[k] + beta)
}
plot(jitter(x), y, pch = 19, cex = 0.2, xlab = 'x', ylab = 'y')

# visualize each dim
par(mfrow = c(2, 2))
plot(y, type = 'l', xlab = "iteration", ylab = "y", main = "gibbs samples for y")
plot(y[1:30], type = 'l', xlab = "iteration", ylab = "y", main = "first 30 iterations")
plot(y[1:80], type = 'l', xlab = "iteration", ylab = "y", main = "first 80 iterations")
plot(y[1:200], type = 'l', xlab = "iteration", ylab = "y", main = "first 200 iterations")

par(mfrow = c(1, 3))
acf(y, main = "no thinning")
acf(y[seq(1, length(y), by = 5)], main = "thin by 5")
acf(y[seq(1, length(y), by = 20)], main = 'thin by 20')

print(paste("Effective sample size for y is", effectiveSize(y)))
## [1] "Effective sample size for y is 139.518663451208"

Metropolis Hastings

library(mvtnorm)
mcmc_MH <- function(Niter, proposal_sigma, mu_init, LL){
    mu_new <- mu_init
    d <- length(mu_new)
    samples_MH <- mu_new
    accept_MH <- 0
    for(iter in 1: Niter){
        if(d > 1){
            propose_MH_mu <- as.numeric(rmvnorm(1, mean = mu_new, sigma = diag(rep(proposal_sigma^2, d))))
        }
        if(d == 1){
            propose_MH_mu <- rnorm(1, mean = mu_new, sd = proposal_sigma)
        }
        rtemp <- LL(propose_MH_mu) - LL(mu_new)
        if(runif(1) < exp(rtemp)){
            mu_new <- propose_MH_mu
            accept_MH <- accept_MH + 1
        }
        samples_MH <- rbind(samples_MH, mu_new)
    }
    return(list(samples = samples_MH, acceptance = accept_MH /Niter))
}

Metropolis-Hastings toy example

LL <- function(x){
  sum(dnorm(x, log = TRUE))
}
Niter = 1000
proposal_sigma = 2 # try other values
mu_init = c(3, 3) # try other values
samplesMH = mcmc_MH (Niter, proposal_sigma, mu_init, LL)
plot(apply(samplesMH$samples, 2, jitter), type = 'l', xlab = '', ylab = '')
points(t(mu_init), cex = 2, col = 'red')

par(mfrow = c(2, 3))
for(k in 1:2){
  plot(jitter(samplesMH$samples[, k]), type = 'l', xlab = '', ylab = '')
  hist(samplesMH$samples[, k], xlab = '', main = '', freq = FALSE)
  lines(seq(-3, 3, length.out = 100), dnorm(seq(-3, 3, length.out = 100)), col = "red")
  acf(samplesMH$samples[, k])
  print(paste("Effective sample size", k, " is", effectiveSize(samplesMH$samples[, k])))
}
## [1] "Effective sample size 1  is 126.447072390848"

## [1] "Effective sample size 2  is 170.754497054289"
print(paste("Acceptance rate is", samplesMH$acceptance))
## [1] "Acceptance rate is 0.32"

try different starting values

Niter = 1000
proposal_sigma = 2 
mu_init_vals = cbind(c(6, 6, -6, -6), c(6, -6, 6, -6))
burnin = 100
samplesMHmulti = apply(mu_init_vals, 1, function(mu_init) mcmc_MH (Niter, proposal_sigma, mu_init, LL))
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(jj in 1:4){
  par(mfrow = c(1, 1))
  samplesMH = samplesMHmulti[[jj]]
  plot(apply(samplesMH$samples, 2, jitter), type = 'l', xlab = '', ylab = '', main = paste("Acceptance rate is", samplesMH$acceptance))
  points(t(samplesMH$samples[, 1]), cex = 2, col = 'red')
  par(mfrow = c(2, 2))
  for(k in 1:2){
    plot(jitter(samplesMH$samples[, k]), type = 'l', xlab = '', ylab = '', main = paste("dim", k))
    plot(jitter(samplesMH$samples[idxburn, k]), type = 'l', xlab = '', ylab = '', main = paste("Effective sample size", k, " is", round(effectiveSize(samplesMH$samples[idxburn, k]), 2)))
    hist(samplesMH$samples[idxburn, k], xlab = '', main = '', freq = FALSE)
    lines(seq(-3, 3, length.out = 100), dnorm(seq(-3, 3, length.out = 100)), col = "red")
    acf(samplesMH$samples[idxburn, k])
    }
}

## Geweke diagnostics
for(k in 1:4){
  print(geweke.diag(samplesMHmulti[[k]]$samples))
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##  var1  var2 
## 1.496 1.101 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  1.3886 -0.6951 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -1.2186  0.4988 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.6942 -1.2734
# after burnin
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(k in 1:4){
  print(geweke.diag(samplesMHmulti[[k]]$samples[idxburn,]))
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.3984 -0.1541 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##     var1     var2 
## -0.03934  0.02760 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.4089 0.3466 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.4322 0.6646
par(mfrow = c(1, 1))
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[, dimselect])
  }
  plot(samples4chains[, 1], xlab = 'iteration', ylab = '', main = 'compare 4 chains', col = 1, type = "l", ylim = range(samples4chains))
  for(jj in 2:4){
    lines(samples4chains[, jj], col = jj)
  }
  legend("topright", col = 1:4, paste("chain", 1:4), lty = rep(1, 4))
  
  idxinit <- 1:30
  plot(samples4chains[idxinit, 1], xlab = 'iteration', ylab = '', main = 'compare 4 chains', col = 1, type = "l", ylim = range(samples4chains))
  for(jj in 2:4){
    lines(samples4chains[idxinit, jj], col = jj)
  }
  legend("topright", col = 1:4, paste("chain", 1:4), lty = rep(1, 4))
}

# analyze variance

# 1. do not break one chain into two
burnin <- 100
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    idxburn <- burnin:dim(samplesMHmulti[[jj]]$samples)[1]
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[idxburn, dimselect])
  }
  within_chain_var <- apply(samples4chains, 2, var)
  means_chains <- apply(samples4chains, 2, mean)
  between_chain_var <- var(means_chains)
  print(paste("dimension", dimselect))
  print(paste("Between chain variance is", round(between_chain_var, 3)))
  print("Within chain variance is")
  print(round(within_chain_var, 3))
}
## [1] "dimension 1"
## [1] "Between chain variance is 0.021"
## [1] "Within chain variance is"
## [1] 1.130 0.808 0.872 1.025
## [1] "dimension 2"
## [1] "Between chain variance is 0.001"
## [1] "Within chain variance is"
## [1] 1.178 0.994 1.061 0.969
# 2. break each chain into two
burnin <- 100
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    idxburn <- burnin:dim(samplesMHmulti[[jj]]$samples)[1]
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[idxburn, dimselect])
  }
  halfnum <- floor(dim(samples4chains)[1]/2)
  samples4chains <- cbind(samples4chains[1:halfnum, ], samples4chains[(halfnum+1):(2*halfnum), ])
  within_chain_var <- apply(samples4chains, 2, var)
  means_chains <- apply(samples4chains, 2, mean)
  between_chain_var <- var(means_chains)
  print(paste("dimension", dimselect))
  print(paste("Between chain variance is", round(between_chain_var, 3)))
  print("Within chain variance is")
  print(round(within_chain_var, 3))
}
## [1] "dimension 1"
## [1] "Between chain variance is 0.025"
## [1] "Within chain variance is"
## [1] 1.048 0.755 0.770 1.101 1.211 0.839 0.965 0.945
## [1] "dimension 2"
## [1] "Between chain variance is 0.007"
## [1] "Within chain variance is"
## [1] 1.078 1.010 1.123 1.088 1.240 0.979 1.000 0.851
mcmclist4chains <- mcmc.list(as.mcmc(samplesMHmulti[[1]]$samples), as.mcmc(samplesMHmulti[[2]]$samples), as.mcmc(samplesMHmulti[[3]]$samples), as.mcmc(samplesMHmulti[[4]]$samples))
gelman.diag(mcmclist4chains)
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.03
## [2,]       1.00       1.01
## 
## Multivariate psrf
## 
## 1.01
gelman.plot(mcmclist4chains)

## Geweke diagnostics
for(k in 1:4){
  print(geweke.diag(mcmclist4chains[[k]]))
  geweke.plot(mcmclist4chains[[k]])
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##  var1  var2 
## 1.496 1.101

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  1.3886 -0.6951

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -1.2186  0.4988

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.6942 -1.2734

# after burnin
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(k in 1:4){
  print(geweke.diag(samplesMHmulti[[k]]$samples[idxburn,]))
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.3984 -0.1541 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##     var1     var2 
## -0.03934  0.02760 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.4089 0.3466 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.4322 0.6646

try different step sizes

Niter = 1000
proposal_sigma = 2 
mu_init = c(0, 0)
burnin = 100
proposal_sigma_vals <- c(0.3, 1, 3, 6)
samplesMHmulti = sapply(proposal_sigma_vals, function(proposal_sigma) mcmc_MH (Niter, proposal_sigma, mu_init, LL), simplify = FALSE)

visualize results

idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(jj in 1:4){
  par(mfrow = c(1, 1))
  samplesMH = samplesMHmulti[[jj]]
  plot(apply(samplesMH$samples, 2, jitter), type = 'l', xlab = '', ylab = '', main = paste("Acceptance rate is", samplesMH$acceptance))
  points(t(samplesMH$samples[, 1]), cex = 2, col = 'red')
  par(mfrow = c(2, 2))
  for(k in 1:2){
    plot(jitter(samplesMH$samples[, k]), type = 'l', xlab = '', ylab = '')
    plot(jitter(samplesMH$samples[idxburn, k]), type = 'l', xlab = '', ylab = '', main = paste("Effective sample size", k, " is", round(effectiveSize(samplesMH$samples[idxburn, k]), 2)))
    hist(samplesMH$samples[idxburn, k], xlab = '', main = '', freq = FALSE)
    lines(seq(-3, 3, length.out = 100), dnorm(seq(-3, 3, length.out = 100)), col = "red")
    acf(samplesMH$samples[idxburn, k], main = '')
    }
}

par(mfrow = c(1, 1))
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[, dimselect])
  }
  plot(samples4chains[, 1], xlab = 'iteration', ylab = '', main = 'compare 4 chains', col = 1, type = "l", ylim = range(samples4chains))
  for(jj in 2:4){
    lines(samples4chains[, jj], col = jj)
  }
  legend("topright", col = 1:4, paste("chain", 1:4), lty = rep(1, 4))
  
  idxinit <- 1:30
  plot(samples4chains[idxinit, 1], xlab = 'iteration', ylab = '', main = 'compare 4 chains', col = 1, type = "l", ylim = range(samples4chains))
  for(jj in 2:4){
    lines(samples4chains[idxinit, jj], col = jj)
  }
  legend("topright", col = 1:4, paste("chain", 1:4), lty = rep(1, 4))
}

# Calculate R hat
burnin <- 100
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    idxburn <- burnin:dim(samplesMHmulti[[jj]]$samples)[1]
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[idxburn, dimselect])
  }
  halfnum <- floor(dim(samples4chains)[1]/2)
  samples4chains <- cbind(samples4chains[1:halfnum, ], samples4chains[(halfnum+1):(2*halfnum), ])
  within_chain_var <- apply(samples4chains, 2, var)
  means_chains <- apply(samples4chains, 2, mean)
  between_chain_var <- var(means_chains)
  print(paste("dimension", dimselect))
  print(paste("Between chain variance is", round(between_chain_var, 3)))
  print("Within chain variance is")
  print(round(within_chain_var, 3))
}
## [1] "dimension 1"
## [1] "Between chain variance is 0.134"
## [1] "Within chain variance is"
## [1] 0.559 1.081 0.587 0.322 1.179 0.993 0.669 0.824
## [1] "dimension 2"
## [1] "Between chain variance is 0.011"
## [1] "Within chain variance is"
## [1] 0.703 0.889 0.722 1.057 0.840 0.793 0.939 0.990
mcmclist4chains <- mcmc.list(as.mcmc(samplesMHmulti[[1]]$samples), as.mcmc(samplesMHmulti[[2]]$samples), as.mcmc(samplesMHmulti[[3]]$samples), as.mcmc(samplesMHmulti[[4]]$samples))
gelman.diag(mcmclist4chains)
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.11       1.29
## [2,]       1.01       1.05
## 
## Multivariate psrf
## 
## 1.12
gelman.plot(mcmclist4chains)

## Geweke diagnostics
for(k in 1:4){
  print(geweke.diag(mcmclist4chains[[k]]))
  geweke.plot(mcmclist4chains[[k]])
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.8755 -2.0223

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  1.1565 -0.6244

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.6528  0.2308

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##     var1     var2 
## -0.09499 -0.27813

# after burnin
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(k in 1:4){
  print(geweke.diag(samplesMHmulti[[k]]$samples[idxburn,]))
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -3.4830  0.6443 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.6974 -0.1809 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.3939 1.4149 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   var1   var2 
## 0.8648 2.1274

for multiple chains with same proposal, calculate R hat (within and between chain variabilities)

Niter = 1000
proposal_sigma = 2 
mu_init = c(0, 0)
burnin = 100
samplesMHmulti = sapply(1:4, function(proposal_sigma) mcmc_MH (Niter, proposal_sigma, mu_init, LL), simplify = FALSE)
# 1. do not break one chain into two
burnin <- 100
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    idxburn <- burnin:dim(samplesMHmulti[[jj]]$samples)[1]
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[idxburn, dimselect])
  }
  within_chain_var <- mean(apply(samples4chains, 2, var))
  means_chains <- apply(samples4chains, 2, mean)
  n = length(idxburn)
  between_chain_var <- var(means_chains) * n
  Rstat <- sqrt(((1 - 1/n) * within_chain_var + between_chain_var /n)/within_chain_var)
  print(paste("dimension", dimselect))
  print(paste("Between chain variance is", round(between_chain_var, 3)))
  print(paste("Within chain variance is", round(within_chain_var, 3)))
  print(paste("Rhat is", round(Rstat, 3)))
}
## [1] "dimension 1"
## [1] "Between chain variance is 7.804"
## [1] "Within chain variance is 0.886"
## [1] "Rhat is 1.004"
## [1] "dimension 2"
## [1] "Between chain variance is 13.743"
## [1] "Within chain variance is 0.943"
## [1] "Rhat is 1.007"
# 2. break each chain into two
burnin <- 100
for(dimselect in 1:2){
  samples4chains = c()
  for(jj in 1:4){
    idxburn <- burnin:dim(samplesMHmulti[[jj]]$samples)[1]
    samples4chains <- cbind(samples4chains, samplesMHmulti[[jj]]$samples[idxburn, dimselect])
  }
  halfnum <- floor(dim(samples4chains)[1]/2)
  samples4chains <- cbind(samples4chains[1:halfnum, ], samples4chains[(halfnum+1):(2*halfnum), ])
  within_chain_var <- mean(apply(samples4chains, 2, var))
  means_chains <- apply(samples4chains, 2, mean)
  n = halfnum
  between_chain_var <- var(means_chains) * n
  Rstat <- sqrt(((1 - 1/n) * within_chain_var + between_chain_var / n)/within_chain_var)
  print(paste("dimension", dimselect))
  print(paste("Between chain variance is", round(between_chain_var, 3)))
  print(paste("Within chain variance is", round(within_chain_var, 3)))
  print(paste("Rhat is", round(Rstat, 3)))
}
## [1] "dimension 1"
## [1] "Between chain variance is 6.411"
## [1] "Within chain variance is 0.881"
## [1] "Rhat is 1.007"
## [1] "dimension 2"
## [1] "Between chain variance is 10.625"
## [1] "Within chain variance is 0.935"
## [1] "Rhat is 1.011"
mcmclist4chains <- mcmc.list(as.mcmc(samplesMHmulti[[1]]$samples), as.mcmc(samplesMHmulti[[2]]$samples), as.mcmc(samplesMHmulti[[3]]$samples), as.mcmc(samplesMHmulti[[4]]$samples))
gelman.diag(mcmclist4chains)
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]       1.01       1.04
## [2,]       1.02       1.05
## 
## Multivariate psrf
## 
## 1.02
gelman.plot(mcmclist4chains)

## Geweke diagnostics
for(k in 1:4){
  print(geweke.diag(mcmclist4chains[[k]]))
  geweke.plot(mcmclist4chains[[k]])
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##     var1     var2 
##  0.07346 -0.82772

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  0.7323 -0.2362

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.9449  0.5986

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  0.5923 -0.7085

# after burnin
idxburn <- burnin:dim(samplesMHmulti[[1]]$samples)[1]
for(k in 1:4){
  print(geweke.diag(samplesMHmulti[[k]]$samples[idxburn,]))
}
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.2862 -0.7265 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
##  0.1408 -0.9675 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -0.1198 -0.9092 
## 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##    var1    var2 
## -1.4935  0.1664